# Run preamble
source("code/preamble.R")

# Load data
study1_data <- readRDS("data/study1_data.rds")
study2_data <- readRDS("data/study2_data.rds")
study3_data <- readRDS("data/study3_data.rds")
study4_data <- readRDS("data/study4_data.rds")
study5_data <- readRDS("data/study5_data.rds")

# Subset Study 2 data
study2_data <- study2_data %>%
  subset(condition == "dist_no_incen")

# Subset Study 3 data
study3_data <- study3_data %>%
  subset(condition == "dist_no_correct") # Dropping treated participants from unrelated experiment

# Subset Study 5 data
study5_data <- study5_data %>%
  subset(condition == "dist_no_correct") # Dropping treated participants from unrelated experiment

# Subset and clean Study 4 data
study4_data <- study4_data %>%
  subset( # Drop Independents and participants in the placebo conditions
    !is.na(pid2) & condition != "Blue vs. Pink" &
      condition != "Red Sox vs. Yankees"
  ) %>%
  mutate( # Create policy column
    policy = condition %>%
      as.character() %>%
      factor(
        levels = c(
          "Gun Control",
          "Border Control",
          "Abortion Access"
        )
      )
  )

# Rename variables
study5_data <- study5_data %>%
  rename(
    out_mean = per_out_mean,
    out_sd = per_out_sd
  )

# Bind data from different studies
bound_data <- bind_rows(
  study1_data, study2_data, study3_data, study4_data, study5_data
) %>%
  select(
    out_therm, comfort_social_outparty, outparty_will_violate_dem,
    out_mean, out_sd, in_mean, in_sd, dem_mean, rep_mean,
    pid2, pid6, gender, race, latinx, edu_factor, age_factor, study, policy,
    par_pos
  )

# Generate new variables
bound_data <- bound_data %>%
  mutate(
    par_pos_stereo = case_when(
      pid2 == "Democrat" ~ 10 - par_pos,
      pid2 == "Republican" ~ par_pos
    ),
    out_mean_stereo = case_when(
      out_mean == dem_mean ~ 10 - out_mean,
      out_mean == rep_mean ~ out_mean
    ),
    in_mean_stereo = case_when(
      in_mean == dem_mean ~ 10 - in_mean,
      in_mean == rep_mean ~ in_mean
    )
  ) %>%
  ungroup()

bound_data <- bound_data %>%
  mutate(
    pid_extrem = abs(pid6 - 2.5) - .5
  )

# Standardize variables
bound_data <- bound_data %>%
  mutate(
    out_therm = out_therm %>%
      scale() %>%
      as.vector(),
    comfort_social_outparty = comfort_social_outparty %>%
      scale() %>%
      as.vector(),
    outparty_will_violate_dem = outparty_will_violate_dem %>%
      scale() %>%
      as.vector(),
    out_mean_stereo = out_mean_stereo %>%
      scale() %>%
      as.vector(),
    out_sd = out_sd %>%
      scale() %>%
      as.vector(),
    pid_extrem = pid_extrem %>%
      scale() %>%
      as.vector()
  )

# Factorize study variable
bound_data$study <- bound_data$study %>% as.factor()

##################
### RUN MODELS ###
##################

# Calculate correlation between perceived-distribution means and point estimates
cor_test <- cor.test(study5_data$post_dist_out_mean, study5_data$out_mean)

## Print
cat(
  "\n\n",
  " Correlation between perceived-distribution means and point estimates:",
  "\n",
  " r =", cor_test$estimate,
  "\n",
  " p =", cor_test$p.value
)

# Stash formulae components
DVs <- c(
  "out_therm", "comfort_social_outparty", "outparty_will_violate_dem"
)

IVs <- c("out_mean_stereo", "out_sd", "pid_extrem")

controls <- c(
  "policy", "pid2", "gender", "race", "latinx", "edu_factor", "age_factor",
  "par_pos_stereo", "study"
)

# Initialize empty data-frame for correlation outputs
corr_outputs <- data.frame()

# Iterate over DVs
for (i in 1:length(DVs)) {
  # Stash DV
  dv <- DVs[i]

  try(
    {
      # Run correlation and store output
      corr <- cor.test(
        bound_data %>% pull(out_mean_stereo),
        bound_data %>% pull(dv)
      ) %>%
        tidy() %>%
        mutate(DV = dv)

      # Clean up and store correlation output
      corr_outputs <- corr %>%
        rbind(corr_outputs)
    },
    silent = TRUE
  )
}

# Initialize empty data-frame for regression outputs
regress_outputs <- data.frame()

# Iterate over DVs
for (j in 1:length(DVs)) {
  # Stash DV
  dv <- DVs[j]

  # Generate regression formula
  formula <- IVs %>%
    paste0(collapse = " + ") %>%
    paste0(
      dv, " ~ ", ., " + ",
      paste0(controls, collapse = " + ")
    ) %>%
    as.formula()

  try(
    {
      # Run regression and store output
      regress <- lm_robust(formula = formula, data = bound_data)

      # Copy regression
      paste0(dv, "_model") %>%
        assign(., regress, envir = .GlobalEnv)

      # Clean up and store regression output
      regress_outputs <- regress %>%
        tidy() %>%
        subset(term %in% IVs) %>%
        rbind(regress_outputs)
    },
    silent = TRUE
  )
}

# Clean up IV and DV names
regress_outputs <- regress_outputs %>%
  mutate(
    term = case_when(
      term == "pid_extrem" ~ "Partisan Identity Strength",
      term == "out_mean_stereo" ~ "Extremity of Average Out-Partisan's Attitude\n(in Direction of Party Stereotype)",
      term == "out_sd" ~ "Perceived Diversity of\nOut-Partisans' Attitudes"
    ),
    term = term %>%
      factor(
        levels = c(
          "Partisan Identity Strength",
          "Extremity of Average Out-Partisan's Attitude\n(in Direction of Party Stereotype)",
          "Perceived Diversity of\nOut-Partisans' Attitudes"
        )
      )
  ) %>%
  mutate(
    outcome = case_when(
      outcome == "out_therm" ~ "Warmth Toward Out-Party",
      outcome == "comfort_social_outparty" ~ "Comfort Socializing with Out-Party",
      outcome == "outparty_will_violate_dem" ~ "Perception that Out-Party Supports Violating Democratic Norms"
    ),
    outcome = outcome %>%
      factor(
        levels = c(
          "Warmth Toward Out-Party",
          "Comfort Socializing with Out-Party",
          "Perception that Out-Party Supports Violating Democratic Norms"
        )
      )
  )

##############
### FIGURE ###
##############

# Plot main effects
regress_outputs %>%
  ggplot(
    aes(x = estimate, y = term)
  ) +
  geom_pointrange(
    aes(
      xmin = conf.low, xmax = conf.high, color = (p.value >= .05)
    ),
    position = position_dodge(width = .4)
  ) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  facet_wrap(~outcome, scales = "free_y", nrow = 5) +
  scale_color_grey() +
  theme_bw() +
  theme(legend.position = "bottom") +
  labs(x = "Cohen's D (Standardized Coefficient)", y = "Predictor\n") +
  guides(
    color = "none",
    shape = guide_legend(nrow = 2)
  )

# Save figure
ggsave(
  filename = "figures/criterion_validity.png",
  device = "png",
  width = 9,
  height = 5
)

# Print disclaimer
cat(
  "\n\n",
  " See criterion_validity.png for Figure 3."
)

############################################
### TABLES FOR SUPPLEMENTARY INFORMATION ###
############################################

# Correlations
corr_outputs %>%
  mutate(
    SE = sqrt(
      (1 - estimate^2) / parameter
    ),
    DV = case_when(
      DV == "out_therm" ~ "Warmth Toward Out-Party",
      DV == "comfort_social_outparty" ~ "Comfort Socializing with Out-Party",
      DV == "outparty_will_violate_dem" ~ "Perception that Out-Party Supports Violating Democratic Norms"
    )
  ) %>%
  select(estimate, SE, parameter, DV) %>%
  rename(
    Correlation = estimate,
    `Degrees of Freedom` = parameter,
    `Dependent Variable` = DV
  ) %>%
  datasummary_df(
    title = "Correlations Between Perceived-Distribution Means and Documented
    Consequences of Misperceptions",
    notes = "Note: This table includes data from Studies 1--5.",
    output = "tables/correlations_w_dist_means.txt"
  )

# Regressions
modelsummary(
  list(
    "Warmth Toward Out-Party" = out_therm_model,
    "Comfort Socializing with Out-Party" = comfort_social_outparty_model,
    "Perception that Out-Party Supports Violating Democratic Norms" = outparty_will_violate_dem_model
  ),
  stars = TRUE,
  coef_rename = c(
    "(Intercept)",
    "Perceived Extremity of Average Out-Partisan's Policy Attitude (in Direction of Party Stereotype)",
    "Perceived Out-Party Attitude Diversity",
    "Partisan Identity Strength",
    "Policy Issue is Border Control",
    "Policy Issue is Abortion Access",
    "Participant is Republican",
    "Participant is a Woman",
    "Participant is Another Gender",
    "Participant is Black",
    "Participant is White",
    "Participant is Multi-Racial",
    "Participant is Another Race",
    "Participant is Hispanic",
    "Participant has an Associate's Degree",
    "Participant has a Bachelor's Degree",
    "Participant has a Post-graduate Degree",
    "Participant is 26--34 years old",
    "Participant is 35--49 years old",
    "Participant is 50--64 years old",
    "Participant is 65+ years old",
    "Extremity of Participant's Policy Attitude (in Direction of In-Party Stereotype)",
    "Study is Study 2",
    "Study is Study 3",
    "Study is Study 4",
    "Study is Study 5"
  ),
  notes = "Note: The reference categories for factor variables are as
    follows: gun control (policy issue), Democrat (partisanship),
    man (gender), Asian (race), not Hispanic (whether Hispanic), high school
    degree or less (education), 18--25 years old (age), and Study 1 (study).",
  title = "Predicting Documented Consequences of Misperceptions with
    Perceived-Distribution Moments",
  output = "tables/moment_effects_on_attitudes.txt"
)

# Print disclaimer
cat(
  "\n\n",
  " See moment_effects_on_attitudes.txt for regressions of downstream attitudes on perceived-distribution moments (i.e., Supplementary Table 9)."
)
